Set up

setwd("/home/Data/Genotype/SzCausal_202211/Summary202301/rmarkdown/")
source("../../helper.R") #helper functions and library load
source("../../notearsMClosses.R")
source("../../utils.R")
source("../../GraphFuncs.R")
library(readxl)
library(dplyr)
library(ggpubr)
library(reshape2)
library(ggplot2)
# set.seed(101)
load("featureSelectionRes.RData")

causal graph construction

graphSampling function

  • default n = 20
  • no tears default lambda1 = 0.1

graphConstruction function

  • default edge threshold = 0.2 (top30 in each sampled subgrapgh)
  • default n = 20
  • default n_cutoff = 10

1. Feature selection Focal variable: Diagnosis

Nodes

prot.use = toLabel
lv.use = colnames(res$Z)[which(colSums(res$Z[prot.use,])>0)]

Sample defalut n=20

FocalDx_DxPre = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                                 pheno = pheno$Group=="Sz", spinePredict = tscale(spinePredict))
FocalDx_DxPrePH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                                 pheno = pheno$Group=="Sz", spinePredict = tscale(spinePredict), pH = pheno$pH)

FocalDx_Pre = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                                      spinePredict = tscale(spinePredict))
FocalDx_PrePH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                                      spinePredict = tscale(spinePredict), pH = pheno$pH)

FocalDx_Dx = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz")
FocalDx_DxPH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz", pH = pheno$pH)

FocalDx = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use)
FocalDx_pH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                                 pH = pheno$pH)

Construct new graph with default threshold=0.2

name = c(prot.use, lv.use, "Spine", "Sz", rownames(spinePredict))
GraphDx_DxPre = graphConstruction(graph_list = FocalDx_DxPre, name = name)
GraphDx_DxPrePH = graphConstruction(graph_list = FocalDx_DxPrePH, name = c(name,"pH"))

name = c(prot.use, lv.use, "Spine", rownames(spinePredict))
GraphDx_Pre = graphConstruction(graph_list = FocalDx_Pre, name = name)
GraphDx_PrePH = graphConstruction(graph_list = FocalDx_PrePH, name = c(name,"pH"))

name = c(prot.use, lv.use, "Spine", "Sz")
GraphDx_Dx = graphConstruction(graph_list = FocalDx_Dx, name = name)
GraphDx_DxPH = graphConstruction(graph_list = FocalDx_DxPH, name = c(name,"pH"))

name = c(prot.use, lv.use, "Spine")
GraphDx = graphConstruction(graph_list = FocalDx, name = name)
GraphDx_pH = graphConstruction(graph_list = FocalDx_pH, name = c(name,"pH"))

Visualize

# graph varType
SYN=length(grep(".syn", prot.use))
Phos=length(grep("p", prot.use))
HOM=length(prot.use)-SYN-Phos
Prot = c(rep("Phos", Phos), rep("Hom", HOM), rep("Syn", SYN))
lenLV = rep("LV",length(lv.use))

Protein features: Focal varaible: Dx

Spine: Avg.spineS

diagnosis, Surrogate Spines

gvarType = c(Prot, lenLV, "Spine", "Sz", rep("PredictSpine",3))
network_visualize(GraphDx_DxPre, gvarType)

Protein features: Focal varaible: Dx

Spine: Avg.spineS

diagnosis, Surrogate Spines, pH

network_visualize(GraphDx_DxPrePH, c(gvarType,"pH"))

Protein features: Focal varaible: Dx

Spine: Avg.spineS

Surrogate Spines

gvarType = c(Prot, lenLV, "Spine", rep("PredictSpine",3))
network_visualize(GraphDx_Pre, gvarType)

Protein features: Focal varaible: Dx

Spine: Avg.spineS

Surrogate Spines, pH

network_visualize(GraphDx_PrePH, c(gvarType,"pH"))

Protein features: Focal varaible: Dx

Spine: Avg.spineS

diagnosis

gvarType = c(Prot, lenLV, "Spine", "Sz")
network_visualize(GraphDx_Dx, gvarType)

Protein features: Focal varaible: Dx

Spine: Avg.spineS

diagnosis, pH

network_visualize(GraphDx_DxPH, c(gvarType,"pH"))

Protein features: Focal varaible: Dx

Spine: Avg.spineS

gvarType = c(Prot, lenLV, "Spine")
network_visualize(GraphDx, gvarType)

Protein features: Focal varaible: Dx

Spine: Avg.spineS

pH

network_visualize(GraphDx_pH, c(gvarType,"pH"))

2. Feature selection Focal variable: Average spine density S

Nodes

prot.use = toLabel.Spine
lv.use = colnames(res$Z)[which(colSums(res$Z[prot.use,])>0)]

Sample defalut n=20

FocalSpine_DxPre = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                                 pheno = pheno$Group=="Sz", spinePredict = tscale(spinePredict))
FocalSpine_DxPrePH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                                 pheno = pheno$Group=="Sz", spinePredict = tscale(spinePredict), pH = pheno$pH)

FocalSpine_Pre = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                                      spinePredict = tscale(spinePredict))
FocalSpine_PrePH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                                      spinePredict = tscale(spinePredict), pH = pheno$pH)

FocalSpine_Dx = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz")
FocalSpine_DxPH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz", pH = pheno$pH)

FocalSpine = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use)
FocalSpine_pH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                           pH = pheno$pH)

FocalSpine_AvgDx = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spinePredict)[1,], prot.use = prot.use,
                                 pheno = pheno$Group=="Sz")
FocalSpine_AvgDxPH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spinePredict)[1,], prot.use = prot.use,
                                 pheno = pheno$Group=="Sz", pH = pheno$pH)

Construct new graph with default threshold=0.2

name = c(prot.use, lv.use, "Spine", "Sz", rownames(spinePredict))
GraphSpine_DxPre = graphConstruction(graph_list = FocalSpine_DxPre, name = name)
GraphSpine_DxPrePH = graphConstruction(graph_list = FocalSpine_DxPrePH, name = c(name,"pH"))

name = c(prot.use, lv.use, "Spine", rownames(spinePredict))
GraphSpine_Pre = graphConstruction(graph_list = FocalSpine_Pre, name = name)
GraphSpine_PrePH = graphConstruction(graph_list = FocalSpine_PrePH, name = c(name,"pH"))

name = c(prot.use, lv.use, "Spine", "Sz")
GraphSpine_Dx = graphConstruction(graph_list = FocalSpine_Dx, name = name)
GraphSpine_DxPH = graphConstruction(graph_list = FocalSpine_DxPH, name = c(name,"pH"))
GraphSpine_AvgDx = graphConstruction(graph_list = FocalSpine_AvgDx, name = name)
GraphSpine_AvgDxPH = graphConstruction(graph_list = FocalSpine_AvgDxPH, name = c(name,"pH"))

name = c(prot.use, lv.use, "Spine")
GraphSpine = graphConstruction(graph_list = FocalSpine, name = name)
GraphSpine_pH = graphConstruction(graph_list = FocalSpine_pH, name = c(name,"pH"))

Visualize

# graph varType
SYN=length(grep(".syn", prot.use))
Phos=length(grep("p", prot.use))
HOM=length(prot.use)-SYN-Phos
Prot = c(rep("Phos", Phos), rep("Hom", HOM), rep("Syn", SYN))
lenLV = rep("LV",length(lv.use))

focal variable: avg.spineS

Protein features: Focal varaible: Avg.spineS

Spine: Avg.spineS

diagnosis, Surrogate Spines

gvarType = c(Prot, lenLV, "Spine", "Sz", rep("PredictSpine",3))
network_visualize(GraphSpine_DxPre, gvarType)

Protein features: Focal varaible: Avg.spineS

Spine: Avg.spineS

diagnosis, Surrogate Spines, pH

network_visualize(GraphSpine_DxPrePH, c(gvarType,"pH"))

Protein features: Focal varaible: Avg.spineS

Spine: Avg.spineS

Surrogate Spines

gvarType = c(Prot, lenLV, "Spine", rep("PredictSpine",3))
network_visualize(GraphSpine_Pre, gvarType)

Protein features: Focal varaible: Avg.spineS

Spine: Avg.spineS

Surrogate Spines, pH

network_visualize(GraphSpine_PrePH, c(gvarType,"pH"))

Protein features: Focal varaible: Avg.spineS

Spine: Avg.spineS

diagnosis

gvarType = c(Prot, lenLV, "Spine", "Sz")
network_visualize(GraphSpine_Dx, gvarType)

Protein features: Focal varaible: Avg.spineS

Spine: Avg.spineS

diagnosis, pH

network_visualize(GraphSpine_DxPH, c(gvarType,"pH"))

Protein features: Focal varaible: Avg.spineS

Spine: Surrogate Avg.spineS

diagnosis

network_visualize(GraphSpine_AvgDx, gvarType)

Protein features: Focal varaible: Avg.spineS

Spine: Surrogate Avg.spineS

diagnosis, pH

network_visualize(GraphSpine_AvgDxPH, c(gvarType,"pH"))

Protein features: Focal varaible: Avg.spineS

Spine: Avg.spineS

gvarType = c(Prot, lenLV, "Spine")
network_visualize(GraphSpine, gvarType)

Protein features: Focal varaible: Avg.spineS

Spine: Avg.spineS

pH

network_visualize(GraphSpine_pH, c(gvarType,"pH"))

3. Focal variable: Avg.spineS, group = Sz or group = C

Nodes

prot.use = toLabel.SpineMarginal
lv.use = colnames(res$Z)[which(colSums(res$Z[prot.use,])>0)]

Focal defalut n=20

FocalSpineM_DxPre = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                                 pheno = pheno$Group=="Sz", spinePredict = tscale(spinePredict))
FocalSpineM_DxPrePH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                                 pheno = pheno$Group=="Sz", spinePredict = tscale(spinePredict), pH = pheno$pH)

FocalSpineM_Dx = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz")
FocalSpineM_DxPH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz", pH = pheno$pH)

FocalSpineM_AvgDx = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spinePredict)[1,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz")
FocalSpineM_AvgDxPH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spinePredict)[1,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz", pH = pheno$pH)

Construct new graph with default threshold=0.2

name = c(prot.use, lv.use, "Spine", "Sz", rownames(spinePredict))
GraphSpineM_DxPre = graphConstruction(graph_list = FocalSpineM_DxPre, name = name)
GraphSpineM_DxPrePH = graphConstruction(graph_list = FocalSpineM_DxPrePH, name = c(name,"pH"))

name = c(prot.use, lv.use, "Spine", "Sz")
GraphSpineM_Dx = graphConstruction(graph_list = FocalSpineM_Dx, name = name)
GraphSpineM_DxPH = graphConstruction(graph_list = FocalSpineM_DxPH, name = c(name,"pH"))

GraphSpineM_AvgDx = graphConstruction(graph_list = FocalSpineM_AvgDx, name = name)
GraphSpineM_AvgDxPH = graphConstruction(graph_list = FocalSpineM_AvgDxPH, name = c(name,"pH"))

Visualize

# graph varType
Prot = c(rep("spineSz", length(toLabel.SpineSz)), rep("SpineCtrl", length(toLabel.SpineCtrl)))
lenLV = rep("LV",length(lv.use))

focal variable: avg.spineS, in Sz or Ctrl group

Protein features: Focal varaible: Avg.spineS in Sz/C

Spine: Avg.spineS

diagnosis, Surrogate Spines

gvarType = c(Prot, lenLV, "Spine", "Sz", rep("PredictSpine",3))
network_visualize(GraphSpineM_DxPre, gvarType)

Protein features: Focal varaible: Avg.spineS in Sz/C

Spine: Avg.spineS

diagnosis, Surrogate Spines, pH

network_visualize(GraphSpineM_DxPrePH, c(gvarType,"pH"))

Protein features: Focal varaible: Avg.spineS in Sz/C

Spine: Avg.spineS

diagnosis

gvarType = c(Prot, lenLV, "Spine", "Sz")
network_visualize(GraphSpineM_Dx, gvarType)

Protein features: Focal varaible: Avg.spineS in Sz/C

Spine: Avg.spineS

diagnosis, pH

network_visualize(GraphSpineM_DxPH, c(gvarType,"pH"))

Protein features: Focal varaible: Avg.spineS in Sz/C

Spine: Surrogate Avg.spineS

diagnosis

gvarType = c(Prot, lenLV, "Spine", "Sz")
network_visualize(GraphSpineM_AvgDx, gvarType)

Protein features: Focal varaible: Avg.spineS in Sz/C

Spine: Surrogate Avg.spineS

diagnosis, pH

network_visualize(GraphSpineM_AvgDxPH, c(gvarType,"pH"))

6. Focal variable: L3.spineS

prot.use = toLabel.SpineL3
lv.use = colnames(res$Z)[which(colSums(res$Z[prot.use,])>0)]

Focal defalut n=20

FocalL3_Dx = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spinePredict)[2,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz")
FocalL3_DxPH = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spinePredict)[2,], prot.use = prot.use,
                             pheno = pheno$Group=="Sz", pH = pheno$pH)

Construct new graph with default threshold=0.2

name = c(prot.use, lv.use, "Spine", "Sz")
GraphL3_Dx = graphConstruction(graph_list = FocalL3_Dx, name = name)

name = c(prot.use, lv.use, "Spine", "Sz","pH")
GraphL3_DxPH = graphConstruction(graph_list = FocalL3_DxPH, name = name)

Visualize

# graph varType
SYN=length(grep(".syn", prot.use))
Phos=length(grep("p", prot.use))
HOM=length(prot.use)-SYN-Phos
Prot = c(rep("Phos", Phos), rep("Hom", HOM), rep("Syn", SYN))
lenLV = rep("LV",length(lv.use))

focal variable: L3.spineS

Protein features: Focal varaible: L3.spineS

Spine: Surrogate L3.spineS

diagnosis

gvarType = c(Prot, lenLV, "Spine", "Sz")
network_visualize(GraphL3_Dx, gvarType)

Protein features: Focal varaible: L3.spineS

Spine: Surrogate L3.spineS

diagnosis, pH

gvarType = c(Prot, lenLV, "Spine", "Sz", "pH")
network_visualize(GraphL3_DxPH, gvarType)

7. feature selection: Union

prot.use = toLabel.Union
lv.use = colnames(res$Z)[which(colSums(res$Z[prot.use,])>0)]

Focal defalut n=20

FocalUnion_Dx = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz")
FocalUnion_DxPH =graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz", pH = pheno$pH)

Construct new graph with default threshold=0.2

name = c(prot.use, lv.use, "Spine", "Sz")
GraphUnion_Dx = graphConstruction(graph_list = FocalUnion_Dx, name = name)

name = c(prot.use, lv.use, "Spine", "Sz", "pH")
GraphUnion_DxPH = graphConstruction(graph_list = FocalUnion_DxPH, name = name)

Visualize

# graph varType
SYN=length(grep(".syn", prot.use))
Phos=length(grep("p", prot.use))
HOM=length(prot.use)-SYN-Phos
Prot = c(rep("Phos", Phos), rep("Hom", HOM), rep("Syn", SYN))
lenLV = rep("LV",length(lv.use))

Protein features: Focal varaible: Union(Diagnosis, Avg.spineS), ~top 30

Spine: Avg.spineS

diagnosis

gvarType = c(Prot, lenLV, "Spine", "Sz")
network_visualize(GraphUnion_Dx, gvarType)

Protein features: Focal varaible: Union(Diagnosis, Avg.spineS), ~top 30

Spine: Avg.spineS

diagnosis, pH

gvarType = c(Prot, lenLV, "Spine", "Sz", "pH")
network_visualize(GraphUnion_DxPH, gvarType)

8. feature selection: Intersect

prot.use = toLabel.Intersect
lv.use = colnames(res$Z)[which(colSums(res$Z[prot.use,])>0)]

Focal defalut n=20

FocalIntersect_Dx = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz")
FocalIntersect_DxPH =graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
                         pheno = pheno$Group=="Sz", pH = pheno$pH)

Construct new graph with default threshold=0.2

name = c(prot.use, lv.use, "Spine", "Sz")
GraphIntersect_Dx = graphConstruction(graph_list = FocalIntersect_Dx, name = name)

name = c(prot.use, lv.use, "Spine", "Sz", "pH")
GraphIntersect_DxPH = graphConstruction(graph_list = FocalIntersect_DxPH, name = name)

Visualize

# graph varType
SYN=length(grep(".syn", prot.use))
Phos=length(grep("p", prot.use))
HOM=length(prot.use)-SYN-Phos
Prot = c(rep("Phos", Phos), rep("Hom", HOM), rep("Syn", SYN))
lenLV = rep("LV",length(lv.use))

Protein features: Focal varaible: Intersect(Diagnosis, Avg.spineS), ~top 30

Spine: Avg.spineS

diagnosis

gvarType = c(Prot, lenLV, "Spine", "Sz")
network_visualize(GraphIntersect_Dx, gvarType)

Protein features: Focal varaible: Intersect(Diagnosis, Avg.spineS), ~top 30

Spine: Avg.spineS

diagnosis, pH

gvarType = c(Prot, lenLV, "Spine", "Sz", "pH")
network_visualize(GraphIntersect_DxPH, gvarType)

relationship between LV and pH

t_res=tscale(res$B)
LV_pH = data.frame()
for (i in 1:nrow(t_res)) {
    cor=cor.test(t_res[i,], pheno$pH)
    a=t(melt(unlist(cor)))
    LV_pH = rbind(LV_pH, a)
}
rownames(LV_pH)=rownames(t_res)
LV_pH=as.data.frame(LV_pH)
LV_pH$p.value=as.numeric(LV_pH$p.value)
LV_pH$estimate.cor=as.numeric(LV_pH$estimate.cor)
ggscatter(LV_pH, "estimate.cor", "p.value", label = rownames(LV_pH), repel = T,
          xlim = c(-0.5, 0.5),
          ylab = "log10 scaled p-val",
          xlab = "Pearson correlation") +
    scale_y_log10(breaks = c(1e-4, 1e-3, 1e-2, 0.05, 0.1, 0.25, 0.5)) +
    labs(title = "Correlation between LVs and pH")